/* $Id: Bioseq.cpp 610969 2020-06-26 12:56:10Z grichenk $
 * ===========================================================================
 *
 *                            PUBLIC DOMAIN NOTICE
 *               National Center for Biotechnology Information
 *
 *  This software/database is a "United States Government Work" under the
 *  terms of the United States Copyright Act.  It was written as part of
 *  the author's official duties as a United States Government employee and
 *  thus cannot be copyrighted.  This software/database is freely available
 *  to the public for use. The National Library of Medicine and the U.S.
 *  Government have not placed any restriction on its use or reproduction.
 *
 *  Although all reasonable efforts have been taken to ensure the accuracy
 *  and reliability of the software and data, the NLM and the U.S.
 *  Government do not and cannot warrant the performance or results that
 *  may be obtained by using this software or data. The NLM and the U.S.
 *  Government disclaim all warranties, express or implied, including
 *  warranties of performance, merchantability or fitness for any particular
 *  purpose.
 *
 *  Please cite the author in any work or product based on this material.
 *
 * ===========================================================================
 *
 * Author:  .......
 *
 * File Description:
 *   .......
 *
 * Remark:
 *   This code was originally generated by application DATATOOL
 *   using specifications from the ASN data definition file
 *   'seq.asn'.
 *
 */

// standard includes
#include <ncbi_pch.hpp>
#include <corelib/ncbiutil.hpp>
#include <serial/enumvalues.hpp>
#include <serial/typeinfo.hpp>

// generated includes
#include <objects/general/Object_id.hpp>

#include <objects/seq/Bioseq.hpp>
#include <objects/seq/Delta_ext.hpp>
#include <objects/seq/Delta_seq.hpp>
#include <objects/seq/IUPACna.hpp>
#include <objects/seq/NCBI4na.hpp>
#include <objects/seq/NCBI8na.hpp>
#include <objects/seq/Seq_annot.hpp>
#include <objects/seq/Seq_data.hpp>
#include <objects/seq/Seq_descr.hpp>
#include <objects/seq/Seq_ext.hpp>
#include <objects/seq/Seq_inst.hpp>
#include <objects/seq/Seq_hist.hpp>

#include <objects/general/Dbtag.hpp>
#include <objects/general/Object_id.hpp>
#include <objects/seqalign/Seq_align.hpp>
#include <objects/seqalign/seqalign_exception.hpp>
#include <objects/seqfeat/Org_ref.hpp>
#include <objects/seqfeat/BioSource.hpp>

#include <objects/seqloc/Seq_id.hpp>
#include <objects/seqloc/Seq_interval.hpp>
#include <objects/seqloc/Seq_loc.hpp>
#include <objects/seqloc/Seq_point.hpp>
#include <objects/seqloc/Textseq_id.hpp>
#include <objects/misc/error_codes.hpp>


#define NCBI_USE_ERRCODE_X   Objects_Bioseq

// generated classes

BEGIN_NCBI_SCOPE

BEGIN_objects_SCOPE // namespace ncbi::objects::

// destructor
CBioseq::~CBioseq(void)
{
}

void CBioseq::UserOp_Assign(const CSerialUserOp& /*source*/)
{
}

bool CBioseq::UserOp_Equals(const CSerialUserOp& /*object*/) const
{
    return true;
}


int CBioseq::sm_ConstructedId = 0;

void CBioseq::x_SeqLoc_To_DeltaExt(const CSeq_loc& loc, CDelta_ext& ext)
{
    switch ( loc.Which() ) {
    case CSeq_loc::e_Packed_int:
        {
            // extract each range, create and add simple location
            ITERATE ( CPacked_seqint::Tdata, ii, loc.GetPacked_int().Get() ) {
                CRef<CDelta_seq> dseq(new CDelta_seq);
                dseq->SetLoc().SetInt().Assign(**ii);
                ext.Set().push_back(dseq);
            }
            break;
        }
    case CSeq_loc::e_Packed_pnt:
        {
            // extract each point
            ITERATE ( CPacked_seqpnt::TPoints, pi,
                      loc.GetPacked_pnt().GetPoints() ) {
                CRef<CSeq_loc> pnt_loc(new CSeq_loc);
                pnt_loc->SetPnt().SetId().Assign(loc.GetPacked_pnt().GetId());
                pnt_loc->SetPnt().SetPoint(*pi);
                if ( loc.GetPacked_pnt().IsSetStrand() ) {
                    pnt_loc->SetPnt().SetStrand(
                        loc.GetPacked_pnt().GetStrand());
                }
                CRef<CDelta_seq> dseq(new CDelta_seq);
                dseq->SetLoc(*pnt_loc);
                ext.Set().push_back(CRef<CDelta_seq>(dseq));
            }
        }
    case CSeq_loc::e_Mix:
        {
            // extract sub-locations
            ITERATE ( CSeq_loc_mix::Tdata, li, loc.GetMix().Get() ) {
                x_SeqLoc_To_DeltaExt(**li, ext);
            }
            return;
        }
    default:
        {
            // Just add the location
            CDelta_seq* dseq = new CDelta_seq;
            CSeq_loc* cp_loc = new CSeq_loc;
            SerialAssign<CSeq_loc>(*cp_loc, loc);
            dseq->SetLoc(*cp_loc);
            ext.Set().push_back(CRef<CDelta_seq>(dseq));
        }
    }
}


CBioseq::CBioseq(const CSeq_loc& loc, const string& str_id)
    : m_ParentEntry(0)
{
    CBioseq::TId& id_list = SetId();

    // Id
    CSeq_id* id = new CSeq_id;
    if ( str_id.empty() ) {
        id->SetLocal().SetStr("constructed" + NStr::IntToString(sm_ConstructedId++));
    }
    else {
        id->SetLocal().SetStr(str_id);
    }
    id_list.push_back(CRef<CSeq_id>(id));

    // Inst
    CSeq_inst& inst = SetInst();
    inst.SetRepr(CSeq_inst::eRepr_const);
    inst.SetMol(CSeq_inst::eMol_other);

    CDelta_ext& ext = inst.SetExt().SetDelta();
    x_SeqLoc_To_DeltaExt(loc, ext);
}


/// Determine the tax-id for this bioseq
TTaxId CBioseq::GetTaxId() const
{
    /// A taxid can be found either in a source descriptor (the newer form) or in a
    /// org descriptor. If both are there, the source descriptor should have precedence.
    TTaxId taxid_from_source = ZERO_TAX_ID,
        taxid_from_org = ZERO_TAX_ID;

    if (IsSetDescr()) {
        ITERATE (TDescr::Tdata, it, GetDescr().Get()) {
            const CSeqdesc& desc = **it;
            if (desc.IsOrg()) {
                taxid_from_org = desc.GetOrg().GetTaxId();
            } else if (desc.IsSource() && desc.GetSource().IsSetOrg()) {
                taxid_from_source = desc.GetSource().GetOrg().GetTaxId();
            }
            if (taxid_from_source != ZERO_TAX_ID) {
                break;
            }
        }
    }

    return taxid_from_source != ZERO_TAX_ID ? taxid_from_source : taxid_from_org;
}


void CBioseq::GetLabel(string* label, ELabelType type, bool worst) const
{
    if (!label) {
        return;
    }

    if (type != eType  &&  !GetId().empty()) {
        const CSeq_id* id = 0;
        CSeq_id worst_id;
        if (!worst) {
            id = GetId().begin()->GetPointer();
            ITERATE (CBioseq::TId, id_itr, GetId()) {
                const CSeq_id& sid = **id_itr;
                switch (sid.Which()) {
                    case CSeq_id::e_Other:
                    case CSeq_id::e_Genbank:
                    case CSeq_id::e_Embl:
                    case CSeq_id::e_Ddbj:
                        {
                            const CTextseq_id& tsid = *sid.GetTextseq_Id ();
                            if (tsid.IsSetAccession()) {
                                id = &sid;
                            }
                        }
                        break;
                    default:
                        break;
                }
            }
        } else {
            const CSeq_id* wid =
                FindBestChoice(GetId(), CSeq_id::WorstRank).GetPointer();
            if (wid) {
                worst_id.Assign(*wid);
                CTextseq_id* tid =
                    const_cast<CTextseq_id*>(worst_id.GetTextseq_Id());
                if (tid) {
                    tid->ResetName();
                }
                id = &worst_id;
            }
        }
        CNcbiOstrstream os;
        if (id) {
            id->WriteAsFasta(os);
            string s = CNcbiOstrstreamToString(os);
            (*label) += s;
        }
    }

    if (type == eContent) {
        return;
    }

    if (!label->empty()) {
        (*label) += ": ";
    }

    const CEnumeratedTypeValues* tv;
    tv = CSeq_inst::GetTypeInfo_enum_ERepr();
    (*label) += tv->FindName(GetInst().GetRepr(), true) + ",";
    tv = CSeq_inst::GetTypeInfo_enum_EMol();
    (*label) += tv->FindName(GetInst().GetMol(), true);
    if (GetInst().IsSetLength()) {
        (*label) += string(" len=") + NStr::IntToString(GetInst().GetLength());
    }
}


const CSeq_id* CBioseq::GetFirstId() const
{
    // If no ids for Bioseq, return 0 -- should not happen
    if (GetId().empty()) {
        return 0;
    }

    return *GetId().begin();
}

static int s_BestNonLocalRank(const CRef<CSeq_id>& id)
{
    if (id.Empty()) {
        return kMax_Int;
    } else if (id->IsLocal()) {
        return kMax_Int - 1;
    } else {
        return id->BestRankScore();
    }
}

const CSeq_id* CBioseq::GetNonLocalId() const
{
    CRef<CSeq_id> id = FindBestChoice(GetId(), &s_BestNonLocalRank);
    if (id.Empty()) {
        return NULL; // No way to verify potential IDs found elsewhere
    } else if ( !id->IsLocal() ) {
        return &*id;
    }

    const CSeq_inst& inst = GetInst();
    if ( !inst.CanGetHist()  ||  !inst.GetHist().CanGetAssembly() ) {
        return NULL;
    }

    ITERATE (CSeq_hist::TAssembly, it, inst.GetHist().GetAssembly() ) {
        try {
            if ((*it)->CheckNumRows() != 2) {
                continue;
            }
        } catch (CSeqalignException&) { // fails basic validation; ignore
            continue;
        }

        const CSeq_id& id1 = (*it)->GetSeq_id(0);
        const CSeq_id& id2 = (*it)->GetSeq_id(1);
        if (id1.IsLocal()  &&  id1.Match(*id)  &&  !id2.IsLocal()) {
            return &id2;
        } else if (id2.IsLocal()  &&  id2.Match(*id)  &&  !id1.IsLocal()) {
            return &id1;
        }
    }

    return NULL;
}


static int s_BestLocalRank(const CRef<CSeq_id>& id)
{
    if (id.Empty() || !id->IsLocal()) {
        return kMax_Int;
    }
    return id->BestRankScore();
}

const CSeq_id* CBioseq::GetLocalId() const 
{
    CRef<CSeq_id> id = FindBestChoice(GetId(), &s_BestLocalRank);
    if (id.NotEmpty() && id->IsLocal()) {
        return &*id;
    }
    return NULL;
}

bool CBioseq::IsNa(void) const
{
    return GetInst ().IsNa ();
}

bool CBioseq::IsAa(void) const
{
    return GetInst ().IsAa ();
}

bool CBioseq::IsSetLength(void) const
{
    return GetInst ().IsSetLength ();
}

TSeqPos CBioseq::GetLength(void) const
{
    return GetInst ().GetLength ();
}

void CBioseq::PackAsDeltaSeq(bool gaps_ok)
{
    CSeq_inst& inst = SetInst();
    if (inst.IsAa()  ||  !inst.IsSetSeq_data()  ||  inst.IsSetExt()) {
        return;
    }
    const CSeq_data& data = inst.GetSeq_data();
    CTempString      src;
    switch (data.Which()) {
    case CSeq_data::e_Ncbi2na:
        return; // optimal as is
#define CODING_CASE(x) \
    case CSeq_data::e_##x: \
        src.assign(&data.Get##x().Get()[0], data.Get##x().Get().size()); \
        break;
    CODING_CASE(Iupacna)
    CODING_CASE(Iupacaa)
    CODING_CASE(Ncbi4na)
    CODING_CASE(Ncbi8na)
    CODING_CASE(Ncbi8aa)
    CODING_CASE(Ncbieaa)
    CODING_CASE(Ncbistdaa)
#undef CODING_CASE
    default:
        ERR_POST_X(1, Warning << "PackAsDeltaSeq: unsupported encoding "
                      << CSeq_data::SelectionName(data.Which()));
        return;
    }

    CDelta_ext& ext = inst.SetExt().SetDelta();
    ext.AddAndSplit(src, data.Which(), inst.GetLength(), gaps_ok);
    if (ext.Get().size() > 1) { // finalize
        inst.SetRepr(CSeq_inst::eRepr_delta);
        inst.ResetSeq_data();
    } else { // roll back
        inst.ResetExt();
    }
}


END_objects_SCOPE // namespace ncbi::objects::

END_NCBI_SCOPE

#undef NCBI_USE_ERRCODE_X

/* Original file checksum: lines: 61, chars: 1871, CRC32: 1d5d7d05 */
